{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "4fb92694-6f80-4e40-910c-3aa044027185",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import glob, os\n",
    "import socket \n",
    "import matplotlib.pyplot as plt\n",
    "import pytz\n",
    "import datetime\n",
    "from datetime import datetime, date, timedelta\n",
    "from linearmodels import PanelOLS\n",
    "\n",
    "pd.set_option('display.max_rows', 500)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ed8f5d97-8453-4332-ad36-8e29956d14f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Summary Statistics\n",
    "\n",
    "def fn_get_sumstats_disclosure(df_f, select_var,index_cv):\n",
    "    df_f = df_f[[index_cv,select_var] ]#.dropna()\n",
    "\n",
    "    order_columns = ['count', 'mean', 'std', '5%', '25%', '50%', '75%', '95%'] \n",
    "    \n",
    "    df_stats_tot = df_f[[select_var]].describe(percentiles = [0.05,0.25,0.5,0.75,0.95]).T.drop(['min','max'], axis = 1)\n",
    "    df_stats_tot.index = ['Total']\n",
    "    \n",
    "    df_stats_cond =df_f.groupby([index_cv])[select_var].describe(percentiles = [0.05,0.25,0.5,0.75,0.95]).drop(['min','max'], axis = 1)\n",
    "    df_stats_cond = df_stats_cond.rename(index = {0:'Not disclosed',1:'Disclosed'})\n",
    "    df_stats = pd.concat([df_stats_tot,df_stats_cond])\n",
    "    \n",
    "    return df_stats\n",
    "\n",
    "df_comp_sort = pd.read_stata('CompanyExposureMEasures.dta')\n",
    "\n",
    "df_f = df_comp_sort.copy()\n",
    "\n",
    "select_var = 'sort_intl_incomeshare_avg3'\n",
    "df_intl_PC = fn_get_sumstats_disclosure(df_f, select_var, 'Mentions_PC')\n",
    "df_intl_10k = fn_get_sumstats_disclosure(df_f, select_var,'Mentions_10K')\n",
    "\n",
    "select_var = 'sort_below_mintax_bycountry_at'\n",
    "df_below_mintax_PC = fn_get_sumstats_disclosure(df_f, select_var, 'Mentions_PC')\n",
    "df_below_mintax_10k = fn_get_sumstats_disclosure(df_f, select_var,'Mentions_10K')\n",
    "\n",
    "select_var = 'taxspread_firm_guo_m'\n",
    "df_tax_spread_PC = fn_get_sumstats_disclosure(df_f, select_var, 'Mentions_PC')\n",
    "df_tax_spread_10k = fn_get_sumstats_disclosure(df_f, select_var,'Mentions_10K')\n",
    "\n",
    "select_var = 'sort_intan_ratioassets'\n",
    "df_tax_intan_PC = fn_get_sumstats_disclosure(df_f, select_var, 'Mentions_PC')\n",
    "df_tax_intan_10k = fn_get_sumstats_disclosure(df_f, select_var,'Mentions_10K')\n",
    "\n",
    "select_var = 'scaled_haven'\n",
    "df_tax_scaled_PC = fn_get_sumstats_disclosure(df_f, select_var, 'Mentions_PC')\n",
    "df_tax_scaled_10k = fn_get_sumstats_disclosure(df_f, select_var,'Mentions_10K')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "c6f278ae-2214-4f41-9497-9fe4894e412e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# scatter plot\n",
    "\n",
    "sorting_variables = ['sort_intl_incomeshare_avg3','sort_foreigntaxbenefit_3y_at','sort_below_mintax_bycountry_at']\n",
    "\n",
    "df_highfreq_returns = pd.read_stata('StockReturnsIntraDay.dta')\n",
    "df_highfreq = pd.merge(df_highfreq_returns, df_comp_sort, how = 'inner', on= 'ticker')\n",
    "\n",
    "for select_var in sorting_variables:\n",
    "    \n",
    "    df_scatterplot = df_highfreq.copy()    \n",
    "\n",
    "    df_scatterplot[select_var] = df_scatterplot[select_var].astype(float)\n",
    "\n",
    "    df_scatterplot['quantile_ex_3'] = pd.qcut(df_scatterplot[select_var],10,duplicates='drop')\n",
    "\n",
    "    df_scatterplot = df_scatterplot.groupby(['quantile_ex_3'])[[select_var,select_return]].mean().reset_index().reset_index()\n",
    "\n",
    "    import matplotlib.pyplot as plt\n",
    "    import seaborn as sns\n",
    "    x = df_scatterplot[select_var]\n",
    "    y = df_scatterplot['return_stock']\n",
    "\n",
    "    fig, ax = plt.subplots(figsize=(10, 10))\n",
    "\n",
    "    ax.scatter(x , y,s=100, color = 'darkblue')\n",
    "\n",
    "    m, b = np.polyfit(x, y, 1)\n",
    "    plt.plot(x, m*x+b,color ='r',linewidth=3)\n",
    "\n",
    "    plt.ylabel('Stock returns in basis points', fontsize='22')\n",
    "\n",
    "    if select_var == 'sort_intl_incomeshare_avg3':\n",
    "        plt.xlabel('Foreign Earnings Ratio', fontsize='22')\n",
    "        name_figure = 'fig_binscatter_intlincomeshare.pdf' \n",
    "\n",
    "    if select_var == 'sort_foreigntaxbenefit_3y_at':\n",
    "        plt.xlabel('Foreign Low Tax Benefit', fontsize='22')\n",
    "        name_figure = 'fig_binscatter_foreigntaxbenefit_3y_at.pdf' \n",
    "\n",
    "    if select_var == 'sort_below_mintax_bycountry_at':\n",
    "        plt.xlabel('Foreign Tax Differential to Minimum Tax', fontsize='22')\n",
    "        name_figure = 'fig_binscatter_below_mintax_bycountry_at.pdf'      \n",
    "\n",
    "    ax.spines['top'].set_visible(False)\n",
    "    ax.spines['right'].set_visible(False)  \n",
    "    ax.xaxis.set_tick_params(labelsize=20)\n",
    "    ax.yaxis.set_tick_params(labelsize=20)\n",
    "    plt.tight_layout()\n",
    "    \n",
    "    plt.savefig(name_figure)  \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "27d82cf6-6062-4445-853a-28c1854eb14c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Regression table\n",
    "important_days = ['2021-01-14T10:30:00.000000000','2021-07-01T12:19:26.000000000', '2021-10-08T13:01:00.000000000']\n",
    "exog_var_results = ['Const','Const_T',  'ForeignEarnings','ForeignEarnings_T', 'ForeignTaxBenefit', 'ForeignTaxBenefit_T','ForeignTax_belowMin','ForeignTax_belowMin_T','R2','nObs']\n",
    "columns_sort_reg = ['sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z','ALL']\n",
    "coef_results = pd.DataFrame('',columns = columns_sort_reg, index = exog_var_results )\n",
    "\n",
    "for select_var in columns_sort_reg:\n",
    "\n",
    "    if select_var != 'ALL':\n",
    "        df_reg = df_highfreq[['Date','return_stock', select_var,'ticker']].dropna()    \n",
    "        df_reg = df_reg[df_reg['Date'].isin(important_days)]\n",
    "        df_reg = df_reg.set_index(['ticker','Date'])\n",
    "        df_reg['Const'] = 1\n",
    "        exog_var = ['Const',select_var]\n",
    "\n",
    "        exog = df_reg[exog_var ]\n",
    "        enog  = df_reg['return_stock'] \n",
    "        mod = PanelOLS(enog, exog)\n",
    "        results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "        params = results.params\n",
    "        tvalues = results.tstats\n",
    "        nobs = results.nobs\n",
    "        R2 = results.rsquared*100\n",
    "\n",
    "        coef_results[select_var].loc['Const'] = params[0]\n",
    "        coef_results[select_var].loc['Const_T'] = tvalues[0]    \n",
    "        \n",
    "        if select_var == 'sort_intl_incomeshare_avg3_Z':\n",
    "            coef_results[select_var].loc['ForeignEarnings'] = params[1]\n",
    "            coef_results[select_var].loc['ForeignEarnings_T'] = tvalues[1]  \n",
    "\n",
    "        if select_var == 'sort_foreigntaxbenefit_3y_at_Z':\n",
    "            coef_results[select_var].loc['ForeignTaxBenefit'] = params[1]\n",
    "            coef_results[select_var].loc['ForeignTaxBenefit_T'] = tvalues[1]\n",
    "\n",
    "        if select_var == 'sort_below_mintax_bycountry_at_Z':\n",
    "            coef_results[select_var].loc['ForeignTax_belowMin'] = params[1]\n",
    "            coef_results[select_var].loc['ForeignTax_belowMin_T'] = tvalues[1]\n",
    "            \n",
    "        coef_results[select_var].loc['R2'] = R2\n",
    "        coef_results[select_var].loc['nObs'] = nobs  \n",
    "        \n",
    "    if select_var == 'ALL': \n",
    "\n",
    "        df_reg = df_highfreq[['Date','return_stock', 'sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z','ticker']].dropna()  \n",
    "        df_reg = df_reg[df_reg['Date'].isin(important_days)]\n",
    "        df_reg = df_reg.set_index(['ticker','Date'])\n",
    "        df_reg['Const'] = 1\n",
    "        exog_var = ['Const','sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z']\n",
    "        exog = df_reg[exog_var ]\n",
    "        enog  = df_reg[return_select] \n",
    "        mod = PanelOLS(enog, exog)\n",
    "        results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "        params = results.params\n",
    "        tvalues = results.tstats\n",
    "        nobs = results.nobs\n",
    "        R2 = results.rsquared*100\n",
    "\n",
    "        coef_results[select_var].loc['Const'] = params[0]\n",
    "        coef_results[select_var].loc['Const_T'] = tvalues[0]    \n",
    "        coef_results[select_var].loc['ForeignEarnings'] = params[1]\n",
    "        coef_results[select_var].loc['ForeignEarnings_T'] = tvalues[1]  \n",
    "        coef_results[select_var].loc['ForeignTaxBenefit'] = params[2]\n",
    "        coef_results[select_var].loc['ForeignTaxBenefit_T'] = tvalues[2]          \n",
    "        coef_results[select_var].loc['ForeignTax_belowMin'] = params[3]\n",
    "        coef_results[select_var].loc['ForeignTax_belowMin_T'] = tvalues[3]   \n",
    "        coef_results[select_var].loc['R2'] = R2\n",
    "        coef_results[select_var].loc['nObs'] = nobs   \n",
    "                 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e7eb239-a6f2-4276-9c81-5db54d6e820b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# all days\n",
    "exog_var_results = ['Const','Const_T',  'ForeignEarnings','ForeignEarnings_T', 'ForeignTaxBenefit', 'ForeignTaxBenefit_T','ForeignTax_belowMin','ForeignTax_belowMin_T','R2','nObs']\n",
    "columns_sort_reg = ['sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z','ALL']\n",
    "coef_results = pd.DataFrame('',columns = columns_sort_reg, index = exog_var_results )\n",
    "\n",
    "for select_var in columns_sort_reg:\n",
    "\n",
    "    if select_var != 'ALL':\n",
    "        df_reg = df_highfreq[['Date','return_stock', select_var,'ticker']].dropna()    \n",
    "        \n",
    "        df_reg = df_reg.set_index(['ticker','Date'])\n",
    "        df_reg['Const'] = 1\n",
    "        exog_var = ['Const',select_var]\n",
    "\n",
    "        exog = df_reg[exog_var ]\n",
    "        enog  = df_reg['return_stock'] \n",
    "        mod = PanelOLS(enog, exog)\n",
    "        results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "        params = results.params\n",
    "        tvalues = results.tstats\n",
    "        nobs = results.nobs\n",
    "        R2 = results.rsquared*100\n",
    "\n",
    "        coef_results[select_var].loc['Const'] = params[0]\n",
    "        coef_results[select_var].loc['Const_T'] = tvalues[0]    \n",
    "        \n",
    "        if select_var == 'sort_intl_incomeshare_avg3_Z':\n",
    "            coef_results[select_var].loc['ForeignEarnings'] = params[1]\n",
    "            coef_results[select_var].loc['ForeignEarnings_T'] = tvalues[1]  \n",
    "\n",
    "        if select_var == 'sort_foreigntaxbenefit_3y_at_Z':\n",
    "            coef_results[select_var].loc['ForeignTaxBenefit'] = params[1]\n",
    "            coef_results[select_var].loc['ForeignTaxBenefit_T'] = tvalues[1]\n",
    "\n",
    "        if select_var == 'sort_below_mintax_bycountry_at_Z':\n",
    "            coef_results[select_var].loc['ForeignTax_belowMin'] = params[1]\n",
    "            coef_results[select_var].loc['ForeignTax_belowMin_T'] = tvalues[1]\n",
    "            \n",
    "        coef_results[select_var].loc['R2'] = R2\n",
    "        coef_results[select_var].loc['nObs'] = nobs  \n",
    "        \n",
    "    if select_var == 'ALL': \n",
    "\n",
    "        df_reg = df_highfreq[['Date','return_stock', 'sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z','ticker']].dropna()  \n",
    "        df_reg = df_reg.set_index(['ticker','Date'])\n",
    "        df_reg['Const'] = 1\n",
    "        exog_var = ['Const','sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z']\n",
    "        exog = df_reg[exog_var ]\n",
    "        enog  = df_reg[return_select] \n",
    "        mod = PanelOLS(enog, exog)\n",
    "        results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "        params = results.params\n",
    "        tvalues = results.tstats\n",
    "        nobs = results.nobs\n",
    "        R2 = results.rsquared*100\n",
    "\n",
    "        coef_results[select_var].loc['Const'] = params[0]\n",
    "        coef_results[select_var].loc['Const_T'] = tvalues[0]    \n",
    "        coef_results[select_var].loc['ForeignEarnings'] = params[1]\n",
    "        coef_results[select_var].loc['ForeignEarnings_T'] = tvalues[1]  \n",
    "        coef_results[select_var].loc['ForeignTaxBenefit'] = params[2]\n",
    "        coef_results[select_var].loc['ForeignTaxBenefit_T'] = tvalues[2]          \n",
    "        coef_results[select_var].loc['ForeignTax_belowMin'] = params[3]\n",
    "        coef_results[select_var].loc['ForeignTax_belowMin_T'] = tvalues[3]   \n",
    "        coef_results[select_var].loc['R2'] = R2\n",
    "        coef_results[select_var].loc['nObs'] = nobs   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6eb09ab-6854-4f99-a8b8-c75aa5593486",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Using additional exposure measures\n",
    "exog_var_results = ['Const','Const_T', 'X','X_T', 'ForeignEarnings','ForeignEarnings_T', 'ForeignTaxBenefit', 'ForeignTaxBenefit_T','ForeignTax_belowMin','ForeignTax_belowMin_T', 'R2','nObs']\n",
    "\n",
    "return_select = 'return_stock'\n",
    "\n",
    "columns_sort_reg = ['X','ALL']\n",
    "coef_results = pd.DataFrame('',columns = columns_sort_reg, index = exog_var_results )\n",
    "\n",
    "select_extra = 'sort_intan_ratioassets_Z'\n",
    "\n",
    "for select_var in columns_sort_reg:\n",
    "    \n",
    "    if select_var == 'X': \n",
    "\n",
    "        X_columns =  [select_extra]   \n",
    "        df_reg = df_highfreq[['Date','return_stock','ticker'] + X_columns].dropna()  \n",
    "        df_reg = df_reg[df_reg['Date'].isin(important_days)]\n",
    "\n",
    "        df_reg = df_reg.set_index(['ticker','Date'])\n",
    "        df_reg['Const'] = 1\n",
    "        exog_var = ['Const',select_extra]\n",
    "        exog = df_reg[exog_var ]\n",
    "        enog  = df_reg['return_stock'] \n",
    "        mod = PanelOLS(enog, exog)\n",
    "        results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "        params = results.params\n",
    "        tvalues = results.tstats\n",
    "        nobs = results.nobs\n",
    "        R2 = results.rsquared*100\n",
    "\n",
    "        coef_results[select_var].loc['Const'] = params[0]\n",
    "        coef_results[select_var].loc['Const_T'] = tvalues[0]\n",
    "        coef_results[select_var].loc['X'] = params[1]\n",
    "        coef_results[select_var].loc['X_T'] = tvalues[1]    \n",
    "        coef_results[select_var].loc['R2'] = R2\n",
    "        coef_results[select_var].loc['nObs'] = nobs  \n",
    "\n",
    "    if select_var == 'ALL':  \n",
    "    \n",
    "        X_columns =  [select_extra,'sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z']       \n",
    "        df_reg = df_highfreq[['Date',return_select,'ticker'] + X_columns].dropna()  \n",
    "        df_reg = df_reg[df_reg['Date'].isin(important_days)]\n",
    "        df_reg = df_reg.set_index(['ticker','Date'])\n",
    "        df_reg['Const'] = 1\n",
    "        exog_var = ['Const'] + X_columns\n",
    "        exog = df_reg[exog_var ]\n",
    "        enog  = df_reg['return_stock'] \n",
    "        mod = PanelOLS(enog, exog)\n",
    "        results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "        params = results.params\n",
    "        tvalues = results.tstats\n",
    "        nobs = results.nobs\n",
    "        R2 = results.rsquared*100\n",
    "\n",
    "        coef_results[select_var].loc['Const'] = params[0]\n",
    "        coef_results[select_var].loc['Const_T'] = tvalues[0]\n",
    "        coef_results[select_var].loc['X'] = params[1]\n",
    "        coef_results[select_var].loc['X_T'] = tvalues[1]    \n",
    "        coef_results[select_var].loc['ForeignEarnings'] = params[2]\n",
    "        coef_results[select_var].loc['ForeignEarnings_T'] = tvalues[2]  \n",
    "        coef_results[select_var].loc['ForeignTaxBenefit'] = params[3]\n",
    "        coef_results[select_var].loc['ForeignTaxBenefit_T'] = tvalues[3]          \n",
    "        coef_results[select_var].loc['ForeignTax_belowMin'] = params[4]\n",
    "        coef_results[select_var].loc['ForeignTax_belowMin_T'] = tvalues[4]  \n",
    "        coef_results[select_var].loc['R2'] = R2\n",
    "        coef_results[select_var].loc['nObs'] = nobs  \n",
    "\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "id": "0da97883-3d62-4975-b21f-eeec3aef8c75",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Event-by-event regression\n",
    "\n",
    "df_reg_tot = df_highfreq.copy()\n",
    "\n",
    "exog_var_results = ['Const','Const_T',  'ForeignEarnings','ForeignEarnings_T', 'ForeignTaxBenefit', 'ForeignTaxBenefit_T','ForeignTax_belowMin','ForeignTax_belowMin_T', 'R2','nObs']\n",
    "\n",
    "coef_results = pd.DataFrame('',columns = list_days, index = exog_var_results )\n",
    "list_days = list(df_highfreq['Date'].unique())\n",
    "\n",
    "for index_day in list_days:\n",
    "    \n",
    "    df_reg_day =df_highfreq[ df_highfreq['Date'] == index_day].copy()\n",
    "    df_reg_day = df_reg_day[['Date','return_stock', 'sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z','ticker']].dropna()   \n",
    "    df_reg_day = df_reg_day.set_index(['ticker','Date'])\n",
    "    df_reg_day['Const'] = 1\n",
    "    exog_var = ['Const','sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z']\n",
    "    exog = df_reg_day[exog_var ]\n",
    "    enog  = df_reg_day['return_stock'] \n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True )\n",
    "    params = results.params\n",
    "    tvalues = results.tstats\n",
    "    nobs = results.nobs\n",
    "    R2 = results.rsquared*100\n",
    "\n",
    "    coef_results[index_day].loc['Const'] = params[0]\n",
    "    coef_results[index_day].loc['Const_T'] = tvalues[0]        \n",
    "    coef_results[index_day].loc['ForeignEarnings'] = params[1]\n",
    "    coef_results[index_day].loc['ForeignEarnings_T'] = tvalues[1]  \n",
    "    coef_results[index_day].loc['ForeignTaxBenefit'] = params[2]\n",
    "    coef_results[index_day].loc['ForeignTaxBenefit_T'] = tvalues[2]          \n",
    "    coef_results[index_day].loc['ForeignTax_belowMin'] = params[3]\n",
    "    coef_results[index_day].loc['ForeignTax_belowMin_T'] = tvalues[3]  \n",
    "    coef_results[index_day].loc['R2'] = R2\n",
    "    coef_results[index_day].loc['nObs'] = nobs      \n",
    "  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "20228623-c9fa-4df3-8f89-d6a6b9f588a0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Full value effect\n",
    "\n",
    "df_prob = pd.read_stata('OptionImpliedProbability.dta.dta')\n",
    "lower_bound =  df_prob['p'].mean()\n",
    "\n",
    "df_mkt  = pd.read_stata('MarketCap.dta')\n",
    "df_mkt_day = df_mkt[df_mkt['Date_day'] == np.datetime64('2021-06-30')].copy().drop(['Date_day'], axis = 1).rename(columns = {'mcap':'mcap_day'})\n",
    "\n",
    "exog_var_results = ['Const','Const_T',  'ForeignEarnings','ForeignEarnings_T', 'ForeignTaxBenefit', 'ForeignTaxBenefit_T','ForeignTax_belowMin','ForeignTax_belowMin_T',\n",
    "                   '1_over_pi', 'Total_effect', 'Dollar_amount']\n",
    "\n",
    "columns_sort = ['ALL','100-31','80-31','60-31','50-31']\n",
    "coef_results = pd.DataFrame(columns = columns_sort, index = exog_var_results )\n",
    "\n",
    "for select_var in columns_sort:  \n",
    "    df_reg_tot = df_highfreq[df_highfreq['Date'] == np.datetime64('2021-07-01T12:19:26.000000000')].copy()\n",
    "    df_reg_tot = df_reg_tot.set_index(['ticker','Date'])  \n",
    "    df_reg_tot['Const'] = 1\n",
    "\n",
    "    exog_var = ['Const','sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z']\n",
    "    exog = df_reg_tot[exog_var]\n",
    "\n",
    "    if select_var == 'ALL':\n",
    "        high_p = 1\n",
    "        lower_p = 0  \n",
    "\n",
    "    if select_var == '100-30':\n",
    "        high_p = 1\n",
    "        lower_p = lower_bound   \n",
    "\n",
    "    if select_var == '80-30':\n",
    "        high_p = 0.8\n",
    "        lower_p = lower_bound       \n",
    "\n",
    "    if select_var == '60-30':\n",
    "        high_p = 0.6\n",
    "        lower_p = lower_bound   \n",
    "\n",
    "    if select_var == '50-30':\n",
    "        high_p = 0.5\n",
    "        lower_p = lower_bound \n",
    "        \n",
    "    enog  = df_reg_tot['return_stock']/(high_p - lower_p)    \n",
    "    \n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True )\n",
    "    params = results.params\n",
    "    tvalues = results.tstats\n",
    "    nobs = results.nobs\n",
    "    R2 = results.rsquared*100\n",
    "\n",
    "    coef_results[select_var].loc['Const'] = params[0]\n",
    "    coef_results[select_var].loc['Const_T'] = tvalues[0]      \n",
    "    coef_results[select_var].loc['ForeignEarnings'] = params[1]\n",
    "    coef_results[select_var].loc['ForeignEarnings_T'] = tvalues[1]  \n",
    "    coef_results[select_var].loc['ForeignTaxBenefit'] = params[2]\n",
    "    coef_results[select_var].loc['ForeignTaxBenefit_T'] = tvalues[2]          \n",
    "    coef_results[select_var].loc['ForeignTax_belowMin'] = params[3]\n",
    "    coef_results[select_var].loc['ForeignTax_belowMin_T'] = tvalues[3]  \n",
    "    coef_results[select_var].loc['1_over_pi'] = 1/(high_p-lower_p)  \n",
    "    \n",
    "    df_temp = pd.merge(df_reg_tot, df_mkt_day, how = 'left', on = ['permno'])\n",
    "    df_temp['mcap_billions'] = df_temp['mcap_day']/(1000000)\n",
    "    \n",
    "    df_temp['Total_cost'] =   coef_results[select_var].loc['Const']+ (  coef_results[select_var].loc['ForeignEarnings'] ) *  df_temp['sort_intl_incomeshare_avg3_Z'] + \\\n",
    "                        (coef_results[select_var].loc['ForeignTaxBenefit']) *  df_temp['sort_foreigntaxbenefit_3y_at_Z'] + \\\n",
    "                        (coef_results[select_var].loc['ForeignTax_belowMin'] ) *  df_temp['sort_below_mintax_bycountry_at_Z'] \n",
    "\n",
    "    df_temp['Total_cost_dollar'] = (df_temp['Total_cost']/(100*100))*(df_temp['mcap_billions'])\n",
    "\n",
    "    coef_results[select_var].loc['Dollar_amount'] = df_temp['Total_cost_dollar'].sum()\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "8ed4b55b-9c73-4939-9c91-6ec8cf579a06",
   "metadata": {},
   "outputs": [],
   "source": [
    "# daily returns\n",
    "from pandas.tseries.offsets import *\n",
    "\n",
    "df_returns_tot = pd.read_stata('AdjustedReturns_daily.dta')\n",
    "\n",
    "df_returns_tot['index_event'] = np.where(df_returns_tot['date'].isin(EventsBusinessDays ), 1, 0)\n",
    "df_returns_tot['index_not_event'] = np.where(~df_returns_tot['date'].isin(EventsBusinessDays ), 1, 0)\n",
    "\n",
    "df_reg_tot = pd.merge(df_returns_tot, df_comp_sort, how = 'inner', on = 'permno')\n",
    "\n",
    "exog_var_results = ['TaxEvent \\times Foreign Earnings Ratio','TaxEvent \\times Foreign Earnings Ratio T',  \n",
    "                    'TaxEvent \\times Foreign Low Tax Benefit To Assets','TaxEvent \\times Foreign Low Tax Benefit To Assets T',\n",
    "                    'TaxEvent \\times ForeignTax_belowMin','TaxEvent \\times ForeignTax_belowMin_T',                          \n",
    "                    'Foreign Earnings Ratio','Foreign Earnings Ratio T',                        \n",
    "                    'Foreign Low Tax Benefit To Assets','Foreign Low Tax Benefit To Assets T',  \n",
    "                    'ForeignTax_belowMin','ForeignTax_belowMin_T',   \n",
    "                    'TaxEvent','TaxEvent_T',                    \n",
    "                    'const','const_T',\n",
    "                    'R2', 'nObs']\n",
    "\n",
    "columns_sort = ['Raw','CAPM','FF3','FF4']\n",
    "coef_results = pd.DataFrame(columns = columns_sort, index = exog_var_results )\n",
    "\n",
    "df_reg = df_reg_tot[['date','ret','exret_FF1','exret_FF3','exret_FF4', 'sort_intl_incomeshare_avg3_Z','sort_foreigntaxbenefit_3y_at_Z','sort_below_mintax_bycountry_at_Z','ticker','index_event','index_not_event']].dropna()\n",
    "\n",
    "df_reg['sort_intl_incomeshare_avg3_Z_event'] = np.where(df_reg['index_event']==1, df_reg['sort_intl_incomeshare_avg3_Z'],0)\n",
    "df_reg['sort_foreigntaxbenefit_3y_at_Z_event'] = np.where(df_reg['index_event']==1, df_reg['sort_foreigntaxbenefit_3y_at_Z'],0)\n",
    "df_reg['sort_below_mintax_bycountry_at_Z_event'] = np.where(df_reg['index_event']==1, df_reg['sort_below_mintax_bycountry_at_Z'],0)    \n",
    "\n",
    "exog_var = [  'Const','index_event','sort_intl_incomeshare_avg3_Z', 'sort_intl_incomeshare_avg3_Z_event'  ,'sort_foreigntaxbenefit_3y_at_Z' ,'sort_foreigntaxbenefit_3y_at_Z_event',\\\n",
    "            'sort_below_mintax_bycountry_at_Z','sort_below_mintax_bycountry_at_Z_event']\n",
    "\n",
    "df_reg = df_reg.set_index(['ticker','date'])\n",
    "df_reg['Const'] = 1\n",
    "\n",
    "for index_reg in columns_sort:\n",
    "\n",
    "    if index_reg == 'Raw':\n",
    "        return_select = 'ret'\n",
    "        \n",
    "    if index_reg == 'CAPM':\n",
    "        return_select = 'exret_FF1'   \n",
    "        \n",
    "    if index_reg == 'FF3':\n",
    "        return_select = 'exret_FF3'   \n",
    "        \n",
    "    if index_reg == 'FF4':\n",
    "        return_select = 'exret_FF4' \n",
    "        \n",
    "    exog = df_reg[exog_var ]\n",
    "    enog  = df_reg[return_select]*100 *100\n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True) \n",
    "    params = results.params\n",
    "    tvalues = results.tstats\n",
    "    nobs = results.nobs\n",
    "    R2 = results.rsquared*100\n",
    "\n",
    "    coef_results[index_reg].loc['const'] = params[0]\n",
    "    coef_results[index_reg].loc['const_T'] = tvalues[0]    \n",
    "    coef_results[index_reg].loc['TaxEvent'] = params[1]\n",
    "    coef_results[index_reg].loc['TaxEvent_T'] = tvalues[1]   \n",
    "    coef_results[index_reg].loc['Foreign Earnings Ratio'] = params[2]\n",
    "    coef_results[index_reg].loc['Foreign Earnings Ratio T'] = tvalues[2]  \n",
    "    coef_results[index_reg].loc['TaxEvent \\times Foreign Earnings Ratio'] = params[3]\n",
    "    coef_results[index_reg].loc['TaxEvent \\times Foreign Earnings Ratio T'] = tvalues[3]  \n",
    "    coef_results[index_reg].loc['Foreign Low Tax Benefit To Assets'] = params[4]\n",
    "    coef_results[index_reg].loc['Foreign Low Tax Benefit To Assets T'] = tvalues[4]  \n",
    "    coef_results[index_reg].loc['TaxEvent \\times Foreign Low Tax Benefit To Assets'] = params[5]\n",
    "    coef_results[index_reg].loc['TaxEvent \\times Foreign Low Tax Benefit To Assets T'] = tvalues[5]        \n",
    "    coef_results[index_reg].loc['ForeignTax_belowMin'] = params[6]\n",
    "    coef_results[index_reg].loc['ForeignTax_belowMin_T'] = tvalues[6]  \n",
    "    coef_results[index_reg].loc['TaxEvent \\times ForeignTax_belowMin'] = params[7]\n",
    "    coef_results[index_reg].loc['TaxEvent \\times ForeignTax_belowMin_T'] = tvalues[7]   \n",
    "    coef_results[index_reg].loc['R2'] = R2\n",
    "    coef_results[index_reg].loc['nObs'] = nobs \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "id": "66c54ad3-4e39-4b6d-954e-7e7729aff94e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Event study plot\n",
    "df_event_plot = pd.read_stata('StockReturnsIntraDay_series.dta')\n",
    "df_event_plot = pd.merge(df_event_plot,df_comp_sort, how = 'inner', on= 'ticker')\n",
    "\n",
    "break_low = 10\n",
    "break_high = 90\n",
    "\n",
    "break_low_df = df_event_plot[['sort_below_mintax_bycountry_at']].describe(percentiles=[break_low/100]).T[[ str(break_low) + '%']]\n",
    "break_low_df = break_low_df.rename(columns={str(break_low) + '%':'Low'})\n",
    "break_high_df = df_event_plot[['sort_below_mintax_bycountry_at']].describe(percentiles=[break_high/100]).T[[ str(break_high) + '%']]\n",
    "break_high_df = break_high_df.rename(columns={str(break_high) + '%':'High'})\n",
    "breaks_temp = pd.merge(break_low_df, break_high_df, how = 'inner', left_index = True, right_index = True)\n",
    "\n",
    "df_event_plot['bins'] = 0 \n",
    "df_event_plot['bins'] = np.where(df_event_plot['sort_below_mintax_bycountry_at'] <= breaks_temp['Low'][0], -1, df_event_plot['bins'])    \n",
    "df_event_plot['bins'] = np.where(df_event_plot['sort_below_mintax_bycountry_at'] > breaks_temp['High'][0], 1, df_event_plot['bins'])   \n",
    "\n",
    "results_df_all = pd.DataFrame()  \n",
    "    \n",
    "min_announcements = list(df_event_plot['time_difference'].unique())\n",
    "\n",
    "for index_min in min_announcements:\n",
    "    \n",
    "    df_bins_reg = df_event_plot[df_event_plot['time_difference'] == index_min]    \n",
    "    cols_to_dummies = \"bins\" \n",
    "\n",
    "    df_with_dummies = pd.get_dummies(df_bins_reg, columns=[cols_to_dummies], drop_first=False)\n",
    "    df_bins_surprise= pd.concat([df_bins_reg[cols_to_dummies], df_with_dummies], axis=1)\n",
    "\n",
    "    columns_dummy  = [x for x in df_bins_surprise if x.startswith('bins_')]\n",
    "\n",
    "    df_plot_reg = df_bins_surprise[['ticker','Date','price_norm'] + columns_dummy ]\n",
    "    df_plot_reg = df_plot_reg.set_index(['ticker','Date']) \n",
    "\n",
    "    exog_var  = columns_dummy\n",
    "    df_plot_reg = df_plot_reg.dropna(subset = exog_var + ['price_norm'])\n",
    "    exog = df_plot_reg[exog_var ]\n",
    "    enog  = df_plot_reg['price_norm'] \n",
    "\n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "\n",
    "    params_est =pd.DataFrame(results.params)  \n",
    "    tvals_est = pd.DataFrame(results.tstats) \n",
    "    std_errors = pd.DataFrame(results.std_errors) \n",
    "\n",
    "    results_df = pd.merge(params_est,tvals_est, how = 'inner', left_index = True, right_index = True)\n",
    "    results_df = pd.merge(results_df,std_errors, how = 'inner', left_index = True, right_index = True).T\n",
    "    results_df['Minute'] = index_min\n",
    "\n",
    "    results_df_all = results_df_all.append(results_df)\n",
    "\n",
    "# plot the estimated coefficients\n",
    "def get_df_plot(df_t,select_var,select_sig):\n",
    "    df_t = df_t[['Minute', select_var]]\n",
    "\n",
    "    df_t_param = df_t[df_t.index == 'parameter'].reset_index(drop = True).set_index(['Minute'])\n",
    "    df_t_param.columns = ['a']\n",
    "    df_t_param = df_t_param.reset_index()\n",
    "   \n",
    "\n",
    "    df_t_se = df_t[df_t.index == 'std_error'].reset_index(drop = True).set_index(['Minute'])\n",
    "    df_t_se.columns = ['a_se']\n",
    "    df_t_se = df_t_se.reset_index()\n",
    "\n",
    "\n",
    "    \n",
    "    df_t_2 = pd.merge(df_t_param, df_t_se, how = 'outer', on = 'Minute')\n",
    "\n",
    "\n",
    "    df_t_2['lb'] = df_t_2['a'] - 1.96*df_t_2['a_se']\n",
    "    df_t_2['ub'] = df_t_2['a'] + 1.96*df_t_2['a_se']\n",
    "\n",
    "    return df_t_2\n",
    "\n",
    "df_plot = results_df_all.sort_values(['Minute'])\n",
    "\n",
    "select_var = 'bins_-1'\n",
    "df_low = get_df_plot(df_plot,select_var,select_sig)\n",
    "\n",
    "select_var = 'bins_1'\n",
    "df_high = get_df_plot(df_plot,select_var,select_sig)\n",
    "\n",
    "fig, axes = plt.subplots(1, 1 , figsize=(16,6), sharex=True, sharey=False)\n",
    "\n",
    "xdata_low = df_low['Minute']\n",
    "ydata_low = df_low['a'] \n",
    "ydata_lb_low = df_low['ub'] \n",
    "ydata_ub_low = df_low['lb'] \n",
    "\n",
    "xdata_high = df_high['Minute']\n",
    "ydata_high = df_high['a']\n",
    "ydata_lb_high = df_high['ub']\n",
    "ydata_ub_high = df_high['lb']\n",
    "\n",
    "plt.plot(xdata_high, ydata_high, '-',color = 'darkblue', linewidth=3)\n",
    "plt.plot(xdata_low, ydata_low, '-',color = 'tab:red', linewidth=3)\n",
    "plt.legend(labels=['High exposure companies', 'Low exposure companies'], loc='upper left'  , fontsize='25',frameon=False)\n",
    "\n",
    "plt.fill_between(xdata_high, ydata_lb_high, ydata_ub_high, color='darkblue', alpha=0.1)\n",
    "plt.fill_between(xdata_low, ydata_lb_low, ydata_ub_low, color='tab:red', alpha=0.1)\n",
    "\n",
    "plt.ylabel('Cumulative price change in basis points', fontsize='20')\n",
    "plt.xlabel('Minutes around the event', fontsize='20')\n",
    "axes.spines['top'].set_visible(False)\n",
    "axes.spines['right'].set_visible(False)  \n",
    "axes.xaxis.set_tick_params(labelsize=19)\n",
    "axes.yaxis.set_tick_params(labelsize=19)\n",
    "\n",
    "plt.axhline(y=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.axvline(x=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "\n",
    "plt.xticks([-20,-10, -5, 0,  10, 20,30 , 60,80,100])\n",
    "\n",
    "plt.axhline(y=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.axvline(x=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.tight_layout()    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ef461a6-eec4-445b-a132-e80837232bc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# differential effect between bin -1 and in 1\n",
    "results_df_all_diff = pd.DataFrame()\n",
    "min_announcements = list(df_event_plot['time_difference'].unique())\n",
    "\n",
    "for index_min in min_announcements:\n",
    "\n",
    "    df_bins_reg = df_event_plot[df_event_plot['time_difference'] == index_min]\n",
    "    df_bins_reg = df_bins_reg[df_bins_reg['bins'] != 0]\n",
    "\n",
    "    df_bins_reg['Dummy_exposed'] = np.where(df_bins_reg['bins']==1,1,0 )\n",
    "    df_plot_reg = df_bins_reg[['ticker','date','price_norm','Dummy_exposed'] ]\n",
    "    df_plot_reg = df_plot_reg.set_index(['ticker','Date']) \n",
    "    df_plot_reg['Const'] = 1\n",
    "\n",
    "    exog_var  = ['Const','Dummy_exposed']\n",
    "    df_plot_reg = df_plot_reg.dropna(subset = exog_var + ['price_norm'])\n",
    "    exog = df_plot_reg[exog_var ]\n",
    "    enog  = df_plot_reg['price_norm'] \n",
    "\n",
    "    mod = PanelOLS(enog, exog)\n",
    "    results = mod.fit(cov_type='clustered' , cluster_entity = True, cluster_time=True)\n",
    "\n",
    "    params_est =pd.DataFrame(results.params)  \n",
    "    tvals_est = pd.DataFrame(results.tstats) \n",
    "    std_errors = pd.DataFrame(results.std_errors) \n",
    "\n",
    "    results_df = pd.merge(params_est,tvals_est, how = 'inner', left_index = True, right_index = True)\n",
    "    results_df = pd.merge(results_df,std_errors, how = 'inner', left_index = True, right_index = True).T\n",
    "    results_df['Minute'] = index_min\n",
    "\n",
    "    results_df_all_diff = results_df_all_diff.append(results_df)\n",
    "\n",
    "# get difference + standard errors\n",
    "df_t = results_df_all_diff[['Minute', 'Dummy_exposed']]\n",
    "df_t_param = df_t[df_t.index == 'parameter'].reset_index(drop = True).set_index(['Minute'])\n",
    "\n",
    "df_t_param.columns = ['d']\n",
    "df_t_param = df_t_param.reset_index()\n",
    "\n",
    "\n",
    "df_t_se = df_t[df_t.index == 'std_error'].reset_index(drop = True).set_index(['Minute'])\n",
    "df_t_se.columns = ['d_se']\n",
    "df_t_se = df_t_se.reset_index()\n",
    "df_t_2 = pd.merge(df_t_param, df_t_se, how = 'outer', on = 'Minute')\n",
    "df_t_2['lb'] = df_t_2['d'] - 1.96*df_t_2['d_se']\n",
    "df_t_2['ub'] = df_t_2['d']+1.96*df_t_2['d_se']\n",
    "\n",
    "df_plot_diff = df_t_2.copy()\n",
    "\n",
    "fig, ax = plt.subplots(1, 1 , figsize=(16,6), sharex=True, sharey=False)\n",
    "\n",
    "xdata = df_plot_diff['Minute']\n",
    "ydata = df_plot_diff['d'] \n",
    "ydata_lb = df_plot_diff['ub'] \n",
    "ydata_ub = df_plot_diff['lb'] \n",
    "\n",
    "plt.plot(xdata, ydata, '-',color = 'darkblue', linewidth=3)\n",
    "plt.fill_between(xdata, ydata_lb, ydata_ub, color='darkblue', alpha=0.1)\n",
    "plt.axhline(y=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.axvline(x=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "\n",
    "ax.spines['top'].set_visible(False)\n",
    "ax.spines['right'].set_visible(False)  \n",
    "ax.xaxis.set_tick_params(labelsize=15)\n",
    "ax.yaxis.set_tick_params(labelsize=15)\n",
    "\n",
    "\n",
    "plt.ylabel('Abnormal cumulative returns in basis points', fontsize='15')\n",
    "plt.xlabel('Minutes around the event', fontsize='15')\n",
    "plt.legend(labels=['High exposure companies minus low exposure companies'], loc='upper right'  , fontsize='18',frameon=False)\n",
    "\n",
    "\n",
    "plt.xticks([-20,-10, -5, 0,  10, 20,30 , 60,80,100])\n",
    "\n",
    "plt.axhline(y=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.axvline(x=0,linestyle =':',color='black',label=r'Announcement',lw=2) \n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d89cecf7-85e4-4e86-bf9f-90bdb23b30dd",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "96389517-5c64-4dd7-a2b5-f9854567288a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a190d4e9-1d0b-4ef1-a977-3e3049b2ef0a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49623769-9af5-47e8-9abf-a0272fb54460",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a3dbecd7-02ad-4269-9e6b-c51f57edf101",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
